// Table Construction for "The Impact of Open Access Mandates..." Bryan/Ozcan RESTAT 2020
// STATA SE 15
// See Data Appendix online for further details on raw data

// Raw Article Data downloaded from PubMed
// Raw Patent Data from USPTO
// Figure A1 from NIH data
// Figure A6 produced using raw data
// Table A11 produced using raw data 

// Change working directory here if needed
cd "C:\Users\kevin.bryan\Dropbox\economics\2014 Yasin Management OA Paper"
// install coefplot if needed


/* -------------------- IN TEXT CITATIONS --------------------------*/
use Sept2015Crosssectiondataset, clear
// Generate variables
gen monthtag2=monthtag^2
// In raw data, weightedcites is exact count of future citations, so following Trajtenberg 1990, we use 1+N, where N is forward cites, as the weight
replace weightedcites=weightedcites+1 if totalcites>0
// Create binary for "at least one patent with at least one forward citation to the patent cites the article"
gen weightbinary=0
replace weightbinary=1 if weightedcites>1
// Create "average quality" of citing patents
gen averagequality=0
replace averagequality=weightedcites/totalcites if totalcites>0
// Create binary for article published after NIH mandate begins
gen afterapril08=0
replace afterapril08=1 if monthtag>=40
gen citedever=0
replace citedever=1 if totalcites>0
gen grantedcitedever=0
replace grantedcitedever=1 if grantedcites>0
gen grantedcited2017ever=0
replace grantedcited2017ever=1 if granted2016>0
gen bigfamilyciteever=0
replace bigfamilyciteever=1 if twoormorefamilysize>0
gen assignedcitedever=0
replace assignedcitedever=1 if assignedcites>0
gen unassignedcitedever=0
replace unassignedcitedever=1 if unassignedcites>0
gen afterapril08nih=0
replace afterapril08nih=1 if afterapril08==1 & usphsfunding==1
gen threeyearbinary=0
replace threeyearbinary=1 if threeyrcites>0
gen threeyearassignedbinary=0
replace threeyearassignedbinary=1 if threeyrassignedcites>0
gen threeyearunassignedbinary=0
replace threeyearunassignedbinary=1 if threeyrunassignedcites>0
/* Journals with allavailable==1 make almost everything from 2005 to 2012 available freely online as of 2014 */
/* Show journals that make 80% (cutoff could go down all the way to 30% with no change) of more of their 
    non-NIH articles free online, call these allavailable=1 since they are unlikely to have been treated */
tab journaltag if afterapril08==1 & usphsfunding==0, sum(fftbinary)
gen allavailable=0
replace allavailable=1 if journaltag==1 | journaltag==4 | journaltag==5 | journaltag==8 | journaltag==12 | journaltag==15 | journaltag==21 | journaltag==23 | journaltag==25 | journaltag==27 | journaltag==28 | journaltag==29 | journaltag==42
/* Provide summary stats */

/* TOP PANEL TABLE 1 */
sum totalcites threeyearbinary academiccites citedever usphsfunding monthtag fftbinary
sum totalcites threeyearbinary academiccites citedever usphsfunding monthtag fftbinary if monthtag<=12
sum totalcites threeyearbinary academiccites citedever usphsfunding monthtag fftbinary if allavailable==1
sum totalcites threeyearbinary academiccites citedever usphsfunding monthtag fftbinary if allavailable==0
/* Even in pre period, NIH funded work is cited more often */

/* TABLE A2 */
poisson citedever usphsfunding i.journaltag i.monthtag if monthtag<40, vce(robust)
poisson totalcites usphsfunding i.journaltag i.monthtag if monthtag<40, vce(robust)



/* --------------------- BASIC NONCAUSAL REGRESSIONS ------------------------------*/
/* basic regressions on whole dataset; large and very significant effect of FFT even controlling for journal and funder, but not causally credible  */
sum totalcites threeyearbinary academiccites citedever usphsfunding

/*  TABLE A1 */
poisson totalcites fftbinary i.monthtag usphsfunding i.journaltag, vce(robust)
poisson citedever fftbinary i.monthtag usphsfunding i.journaltag, vce(robust)
poisson academiccites fftbinary i.monthtag usphsfunding i.journaltag, vce(robust)
/* adding article country dummies also doesn't change things */
xi: poisson citedever fftbinary i.monthtag usphsfunding i.journaltag i.articlecountrycode, vce(robust)

by monthtag, sort: egen alldataoa = mean(citedever) if fftbinary==1
by monthtag, sort: egen alldatanotoa = mean(citedever) if fftbinary==0
by monthtag, sort: egen alldataacoa = mean(academiccites) if fftbinary==1
by monthtag, sort: egen alldataacnotoa = mean(academiccites) if fftbinary==0
by monthtag, sort: egen alldatatotaloa = mean(totalcites) if fftbinary==1
by monthtag, sort: egen alldatatotalnotoa = mean(totalcites) if fftbinary==0
label define plotlabel 1 "Jan 2005" 25 "Jan 2007" 40 "Apr 2008" 64 "Jan 2010" 96 "Dec 2012"
label values monthtag plotlabel

/* FIGURE A4 */
twoway (scatter alldataoa monthtag, msize(vsmall) mcolor(gs1)) (scatter alldatanotoa monthtag, msize(vsmall) mcolor(gs10)), title("Raw Difference in Patent Citation Propensity") xtitle("Month of Publication") legend(off)  xscale(range(0 100) axis(1)) xlabel(1 25 40 64 96,val) yscale(range(0 .5) axis(1)) ytitle("Propensity to be Cited by a Patent") note("Note: Black dots are OA articles, grey dots are gated", size(small) position(5))
graph export rawpatentcitedifference.png, replace width(4000)
twoway (scatter alldatatotaloa monthtag, msize(vsmall) mcolor(gs1)) (scatter alldatatotalnotoa monthtag, msize(vsmall) mcolor(gs10)), title("Raw Difference in Patent Citation Rates") xtitle("Month of Publication") legend(off)  xscale(range(0 100) axis(1)) xlabel(1 25 40 64 96,val) yscale(range(0 .5) axis(1)) ytitle("Mean Number of Patent Citations") note("Note: Black dots are OA articles, grey dots are gated", size(small) position(5))
graph export rawpatenttotalcitedifference.png, replace width(4000)
twoway (scatter alldataacoa monthtag, msize(vsmall) mcolor(gs1)) (scatter alldataacnotoa monthtag, msize(vsmall) mcolor(gs10)), title("Raw Difference in Academic Citation Rates") xtitle("Month of Publication") legend(off) xscale(range(0 100) axis(1)) xlabel(1 25 40 64 96,val) yscale(range(0 .25) axis(1)) ytitle("Mean Number of Academic Citations") note("Note: Black dots are OA articles, grey dots are gated", size(small) position(5))
graph export rawacademiccitedifference.png, replace width(4000)



/* --------------------------------PRIMARY ESTIMATES----------------------------------*/
/* multiplicative treatment effect -> use poisson (ppml), though we show reg just to be clear */
/* Essentially, checking for journals which we expect to be affected whether there is a jump in the ratio of cites to NIH funded versus other articles after Apr 08 publication date */
/* Need multiplicative treatment effect b/c difference in cites declining over time due to patent apps still secret */

/* TABLE 2 */
poisson totalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson citedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson threeyearbinary afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson academiccites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
/* nil effect for academic articles, allavailable==1 just to check for null effect */
poisson totalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson citedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson threeyearbinary afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson academiccites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)

/* Forward-citation weighted cites */
/* Roughly similar magnitude, but super noisy */
/* TABLE A9 */
poisson weightedcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson weightbinary afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson averagequality afterapril08nih totalcites i.monthtag i.journaltag usphsfunding if allavailable==0 & totalcites>0, vce(robust)

/* Show half-year by half-year the treatment effect NIHxPublicationthathalfyear */
gen in20051nih=0
replace in20051nih=1 if monthtag<=6 & usphsfunding==1
gen in20052nih=0
replace in20052nih=1 if monthtag>6 & monthtag<=12 & usphsfunding==1
gen in20061nih=0
replace in20061nih=1 if monthtag>12 & monthtag<=18 & usphsfunding==1
gen in20062nih=0
replace in20062nih=1 if monthtag>18 & monthtag<=24 & usphsfunding==1
gen in20071nih=0
replace in20071nih=1 if monthtag>24 & monthtag<=30 & usphsfunding==1
gen in20072nih=0
replace in20072nih=1 if monthtag>30 & monthtag<=36 & usphsfunding==1
gen in20081nih=0
replace in20081nih=1 if monthtag>36 & monthtag<=42 & usphsfunding==1
gen in20082nih=0
replace in20082nih=1 if monthtag>42 & monthtag<=48 & usphsfunding==1
gen in20091nih=0
replace in20091nih=1 if monthtag>48 & monthtag<=54 & usphsfunding==1
gen in20092nih=0
replace in20092nih=1 if monthtag>54 & monthtag<=60 & usphsfunding==1
gen in20101nih=0
replace in20101nih=1 if monthtag>60 & monthtag<=66 & usphsfunding==1
gen in20102nih=0
replace in20102nih=1 if monthtag>66 & monthtag<=72 & usphsfunding==1
gen in20111nih=0
replace in20111nih=1 if monthtag>72 & monthtag<=78 & usphsfunding==1
gen in20112nih=0
replace in20112nih=1 if monthtag>78 & monthtag<=84 & usphsfunding==1
gen in20121nih=0
replace in20121nih=1 if monthtag>84 & monthtag<=90 & usphsfunding==1
gen in20122nih=0
replace in20122nih=1 if monthtag>90 & monthtag<=96 & usphsfunding==1
gen in20051=0
replace in20051=1 if monthtag<=6
gen in20052=0
replace in20052=1 if monthtag>6 & monthtag<=12 
gen in20061=0
replace in20061=1 if monthtag>12 & monthtag<=18
gen in20062=0
replace in20062=1 if monthtag>18 & monthtag<=24 
gen in20071=0
replace in20071=1 if monthtag>24 & monthtag<=30 
gen in20072=0
replace in20072=1 if monthtag>30 & monthtag<=36 
gen in20081=0
replace in20081=1 if monthtag>36 & monthtag<=42 
gen in20082=0
replace in20082=1 if monthtag>42 & monthtag<=48 
gen in20091=0
replace in20091=1 if monthtag>48 & monthtag<=54 
gen in20092=0
replace in20092=1 if monthtag>54 & monthtag<=60 
gen in20101=0
replace in20101=1 if monthtag>60 & monthtag<=66 
gen in20102=0
replace in20102=1 if monthtag>66 & monthtag<=72 
gen in20111=0
replace in20111=1 if monthtag>72 & monthtag<=78 
gen in20112=0
replace in20112=1 if monthtag>78 & monthtag<=84 
gen in20121=0
replace in20121=1 if monthtag>84 & monthtag<=90 
gen in20122=0
replace in20122=1 if monthtag>90 & monthtag<=96 
label variable in20061nih "2006"
label variable in20062nih " "
label variable in20071nih "2007"
label variable in20072nih " "
label variable in20081nih "2008"
label variable in20082nih " "
label variable in20091nih "2009"
label variable in20092nih " "
label variable in20101nih "2010"
label variable in20102nih " "
label variable in20111nih "2011"
label variable in20112nih " "
label variable in20121nih "2012"
label variable in20122nih " "

/* TABLE A4 */
/* coefficients are relative to 2005, in terms of IRR (so exp(poisson coefficient)) */
poisson totalcites  in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==0, vce(robust) irr
est stor totalcitecoeff
poisson citedever in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==0, vce(robust) irr
est stor citedevercoeff
poisson threeyearbinary  in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==0, vce(robust) irr
poisson academiccites in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==0, vce(robust) irr
est stor accitecoeff
/* Do same regressions by half-year using unaffected journals */
poisson totalcites  in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==1, vce(robust) irr
est stor totalcitecoeff2
poisson citedever in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==1, vce(robust) irr
est stor citedevercoeff2
poisson threeyearbinary  in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==1, vce(robust) irr
poisson academiccites in20061nih in20062nih in20071nih in20072nih in20081nih in20082nih in20091nih in20092nih in20101nih in20102nih in20111nih in20112nih in20121nih in20122nih in20061 in20062 in20071 in20072 in20081 in20082 in20091 in20092 in20101 in20102 in20111 in20112 in20121 in20122 monthtag monthtag2 i.journaltag usphsfunding if allavailable==1, vce(robust) irr
est stor accitecoeff2

/*   FIGURE 3  */
coefplot citedevercoeff, bylabel(">1 Patent Cite, Affected Journals") || accitecoeff, bylabel("Academic Cites, Affected Journals") || citedevercoeff2, bylabel(">1 Patent Cite, Unaffected") || accitecoeff2, bylabel("Academic Cites, Unaffected") ||, keep(*nih) yline(0) levels(90) ylabel(-.4(.4).8) vertical
graph export coefficientsbyhalfyear.png, replace width(4000)




/* Compute OLS, logit, probit */
/* Compute difference in average marginal effect, then convert to (in // terms) by dividing by average in treated group

/* SIX REGRESSIONS ARE TABLE A10 */
probit totalcites i.afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
margins, dydx(afterapril08nih)
logit totalcites i.afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
margins, dydx(afterapril08nih)
sum totalcites citedever if allavailable==0 & afterapril08==1 & usphsfunding==1
/* Compute raw difference in means */
/* CITED IN ONLINE APPENDIX 2 */
sum totalcites citedever if allavailable==0 & afterapril08==0 & usphsfunding==1
sum totalcites citedever if allavailable==0 & afterapril08==0 & usphsfunding==0
sum totalcites citedever if allavailable==0 & afterapril08==1 & usphsfunding==1
sum totalcites citedever if allavailable==0 & afterapril08==1 & usphsfunding==0
reg totalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
reg citedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
/* Check log linearized OLS */
gen lncitedever=0
replace lncitedever=ln(2) if citedever==1
gen lntotalcites=0
replace lntotalcites=ln(totalcites+1) if totalcites>0
reg lntotalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
reg lncitedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)

/* Check treatment effect by journal, and show NIH*postApril08 only affects journals in our set which are likely to have been affected by NIH open access policy */
tab journaltag if afterapril08==1, sum(usphsfunding)
gen aft1=0
gen aft2=0
gen aft3=0
gen aft4=0
gen aft5=0
gen aft6=0
gen aft7=0
gen aft8=0
gen aft9=0
gen aft10=0
gen aft11=0
gen aft12=0
gen aft13=0
gen aft14=0
gen aft15=0
gen aft16=0
gen aft17=0
gen aft18=0
gen aft19=0
gen aft20=0
gen aft21=0
gen aft22=0
gen aft23=0
gen aft24=0
gen aft25=0
gen aft26=0
gen aft27=0
gen aft28=0
gen aft29=0
gen aft30=0
gen aft31=0
gen aft32=0
gen aft33=0
gen aft34=0
gen aft35=0
gen aft36=0
gen aft37=0
gen aft38=0
gen aft39=0
gen aft40=0
gen aft41=0
gen aft42=0
gen aft43=0
replace aft1=1 if afterapril08nih==1 & journaltag==1
replace aft2=1 if afterapril08nih==1 & journaltag==2
replace aft3=1 if afterapril08nih==1 & journaltag==3
replace aft4=1 if afterapril08nih==1 & journaltag==4
replace aft5=1 if afterapril08nih==1 & journaltag==5
replace aft6=1 if afterapril08nih==1 & journaltag==6
replace aft7=1 if afterapril08nih==1 & journaltag==7
replace aft8=1 if afterapril08nih==1 & journaltag==8
replace aft9=1 if afterapril08nih==1 & journaltag==9
replace aft10=1 if afterapril08nih==1 & journaltag==10
replace aft11=1 if afterapril08nih==1 & journaltag==11
replace aft12=1 if afterapril08nih==1 & journaltag==12
replace aft13=1 if afterapril08nih==1 & journaltag==13
replace aft14=1 if afterapril08nih==1 & journaltag==14
replace aft15=1 if afterapril08nih==1 & journaltag==15
replace aft16=1 if afterapril08nih==1 & journaltag==16
replace aft17=1 if afterapril08nih==1 & journaltag==17
replace aft18=1 if afterapril08nih==1 & journaltag==18
replace aft19=1 if afterapril08nih==1 & journaltag==19
replace aft20=1 if afterapril08nih==1 & journaltag==20
replace aft21=1 if afterapril08nih==1 & journaltag==21
replace aft22=1 if afterapril08nih==1 & journaltag==22
replace aft23=1 if afterapril08nih==1 & journaltag==23
replace aft24=1 if afterapril08nih==1 & journaltag==24
replace aft25=1 if afterapril08nih==1 & journaltag==25
replace aft26=1 if afterapril08nih==1 & journaltag==26
replace aft27=1 if afterapril08nih==1 & journaltag==27
replace aft28=1 if afterapril08nih==1 & journaltag==28
replace aft29=1 if afterapril08nih==1 & journaltag==29
replace aft30=1 if afterapril08nih==1 & journaltag==30
replace aft31=1 if afterapril08nih==1 & journaltag==31
replace aft32=1 if afterapril08nih==1 & journaltag==32
replace aft33=1 if afterapril08nih==1 & journaltag==33
replace aft34=1 if afterapril08nih==1 & journaltag==34
replace aft35=1 if afterapril08nih==1 & journaltag==35
replace aft36=1 if afterapril08nih==1 & journaltag==36
replace aft37=1 if afterapril08nih==1 & journaltag==37
replace aft38=1 if afterapril08nih==1 & journaltag==38
replace aft39=1 if afterapril08nih==1 & journaltag==39
replace aft40=1 if afterapril08nih==1 & journaltag==40
replace aft41=1 if afterapril08nih==1 & journaltag==41
replace aft42=1 if afterapril08nih==1 & journaltag==42
replace aft43=1 if afterapril08nih==1 & journaltag==43
poisson totalcites aft1 aft2 aft3 aft4 aft5 aft6 aft7 aft8 aft9 aft10 aft11 aft12 aft13 aft14 aft15 aft16 aft17 aft18 aft19 aft20 aft21 aft22 aft23 aft24 aft25 aft26 aft27 aft28 aft29 aft30 aft31 aft32 aft33 aft34 aft35 aft36 aft37 aft38 aft39 aft40 aft41 aft42 aft43 i.monthtag i.journaltag usphsfunding, vce(robust)
est stor totalcitejourbyjourcoeff
poisson citedever aft1 aft2 aft3 aft4 aft5 aft6 aft7 aft8 aft9 aft10 aft11 aft12 aft13 aft14 aft15 aft16 aft17 aft18 aft19 aft20 aft21 aft22 aft23 aft24 aft25 aft26 aft27 aft28 aft29 aft30 aft31 aft32 aft33 aft34 aft35 aft36 aft37 aft38 aft39 aft40 aft41 aft42 aft43 i.monthtag i.journaltag usphsfunding, vce(robust)
est stor citedeverjourbyjourcoeff
poisson threeyearbinary aft1 aft2 aft3 aft4 aft5 aft6 aft7 aft8 aft9 aft10 aft11 aft12 aft13 aft14 aft15 aft16 aft17 aft18 aft19 aft20 aft21 aft22 aft23 aft24 aft25 aft26 aft27 aft28 aft29 aft30 aft31 aft32 aft33 aft34 aft35 aft36 aft37 aft38 aft39 aft40 aft41 aft42 aft43 i.monthtag i.journaltag usphsfunding, vce(robust)
est stor threeyrcitejourbyjourcoeff
label variable aft1 "NEJM"
label variable aft2 "Lancet"
label variable aft3 "JAMA"
label variable aft4 "J Exp M"
label variable aft5 "J Clin I"
label variable aft6 "Neuron"
label variable aft7 "Natur Med"
label variable aft8 "Circulat"
label variable aft9 "J Clin O"
label variable aft10 "Nature Imm"
label variable aft11 "Immunity"
label variable aft12 "Blood"
label variable aft13 "Gastroent"
label variable aft14 "J Am Col"
label variable aft15 "J Neurosci"
label variable aft16 "Nature Neu"
label variable aft17 "NeuroImage"
label variable aft18 "Cancer C"
label variable aft19 "Oncogene"
label variable aft20 "Hepatolo"
label variable aft21 "Genome R"
label variable aft22 "Biol Psy"
label variable aft23 "Cancer R"
label variable aft24 "J Neuroc"
label variable aft25 "Arthriti"
label variable aft26 "Lancet N"
label variable aft27 "Clin Can"
label variable aft28 "Clin Infec"
label variable aft29 "Brain"
label variable aft30 "J Allerg"
label variable aft31 "Neurolog"
label variable aft32 "Cell Stem"
label variable aft33 "Lancet O"
label variable aft34 "Nat Biol"
label variable aft35 "Trends B"
label variable aft36 "Appl Mic"
label variable aft37 "Biotech B"
label variable aft38 "Tissue Eng"
label variable aft39 "J Biotech"
label variable aft40 "J Neural"
label variable aft41 "Biotechnol B"
label variable aft42 "Biotechnique"
label variable aft43 "Transgen"
/* Drop 35 41 43 from graph b/c small sample size leads to huge error bars */

/* FIGURE A5 */
coefplot totalcitejourbyjourcoeff, bylabel("OA Effect by Journal: Total Cites") || citedeverjourbyjourcoeff, bylabel(">1 Cite") ||, keep(aft*) order(aft2 aft3 aft6 aft7 aft9 aft10 aft11 aft13 aft14 aft16 aft17 aft18 aft19 aft20 aft22 aft24 aft26 aft30 aft31 aft32 aft33 aft34 aft36 aft37 aft38 aft39 aft40 aft1 aft4 aft5 aft8 aft12 aft15 aft21 aft23 aft25 aft27 aft28 aft29 aft42) drop(aft35 aft41 aft43 afterapril08) headings(aft2="{bf:Affected Journals}" aft1="{bf:Unaffected Journals}") xline(0) xlabel(-.4(.4)1.6)
graph export coefficientsbyjournal.png, replace width(4000)

/* Check subsamples */
gen corppatever=0
replace corppatever=1 if corppat>=1
gen univpatever=0
replace univpatever=1 if univpat>=1
gen indivpatever=0
replace indivpatever=1 if indivpat>=1
gen govtpatever=0
replace govtpatever=1 if govtpat>=1
gen hosppatever=0
replace hosppatever=1 if hosppat>=1
gen smallcorpever=0
replace smallcorpever=1 if corppat>majorbiotechpat
gen smallcorpbroadever=0
replace smallcorpbroadever=1 if corppat>majorbiotechpatbroad
gen smallcorptotalcites=0
replace smallcorptotalcites=corppat-majorbiotechpat
gen smallcorpbroadtotalcites=0
replace smallcorpbroadtotalcites=corppat-majorbiotechpatbroad
gen largecorpever=0
replace largecorpever=1 if majorbiotechpat>=1
gen largecorpbroadever=0
replace largecorpbroadever=1 if majorbiotechpatbroad>=1
gen threeplusinvever=0
replace threeplusinvever=1 if threeplusinv>=1
gen fiveplusinvever=0
replace fiveplusinvever=1 if fiveplusinv>=1
gen twominusinvever=0
replace twominusinvever=1 if twominusinv>=1
gen multiclasspatever=0
replace multiclasspatever=1 if multiclasspat>=1
// Region means either the state or the country are the same
gen sameregionever=0
replace sameregionever=1 if sameregionpat>=1
gen samecountryever=0
replace samecountryever=1 if samecountrypat>=1
gen diffregionever=0
replace diffregionever=1 if diffregionpat>=1
gen diffcountryever=0
replace diffcountryever=1 if diffcountrypat>=1

/* TABLE A6 */
poisson assignedcitedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson unassignedcitedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson sameregionever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson diffregionever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson bigfamilyciteever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)

/* TABLE A7 */
poisson univpat afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson corppat afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson univpatever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson corppatever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
/* Problem is how small cuts go: imagine you expect biggest effect from small private sector assignees */
/* "small and broad" means 10 or fewer patents in our dataset */
poisson smallcorpbroadtotalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson smallcorpbroadever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
/* comparing 292 to 473 patents citing one of these articles in an affected journal in post-period */
/* And "small" is not measured well: academic consultants, etc. are unobserved in data */


// Nearly precise zeros for all of these subgroups if we look at unaffected journals
/* TABLE A5 */
poisson assignedcitedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson unassignedcitedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson corppatever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson univpatever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)

/* Check effect of distance */
/* Region means same state or if outside US, same country - not enough power to know whether this matters more at distance or not */
/* positive and significant for both local and to other regions */
poisson samecountryever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson sameregionever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson diffcountryever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson diffregionever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
/* much larger effect for small number of inventors on patent */
/* Table A8 */
poisson threeplusinvever afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)
poisson fiveplusinvever afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)
poisson twominusinvever afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)
poisson threeplusinv afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)
poisson fiveplusinv afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)
poisson twominusinv afterapril08nih i.monthtag i.journaltag usphsfunding  if allavailable==0, vce(robust)

/* construct graphs showing multiplicative treatment effect for patents and not for academia */
gen patnih=.
replace patnih=citedever if usphsfunding==1 
gen patnotnih=.
replace patnotnih=citedever if usphsfunding==0 
by monthtag, sort: egen nihmean = mean(patnih) if allavailable==0
by monthtag, sort: egen notnihmean = mean(patnotnih) if allavailable==0
gen nihratio=nihmean/notnihmean
// Generate "placebo" propensity picture for journals that are "untreated" by NIH policy
by monthtag, sort: egen placebonihmean = mean(patnih) if allavailable==1
by monthtag, sort: egen placebonotnihmean = mean(patnotnih) if allavailable==1
gen placebonihratio=placebonihmean/placebonotnihmean
gen totalpatnih=.
replace totalpatnih=totalcites if usphsfunding==1
gen totalpatnotnih=.
replace totalpatnotnih=totalcites if usphsfunding==0
by monthtag, sort: egen totalnihmean = mean(totalpatnih) if allavailable==0
by monthtag, sort: egen totalnotnihmean = mean(totalpatnotnih) if allavailable==0
gen totalnihratio=totalnihmean/totalnotnihmean
gen totalacpatnih=.
replace totalacpatnih=academiccites if usphsfunding==1
gen totalacpatnotnih=.
replace totalacpatnotnih=academiccites if usphsfunding==0
by monthtag, sort: egen totalacnihmean = mean(totalacpatnih) if allavailable==0
by monthtag, sort: egen totalacnotnihmean = mean(totalacpatnotnih) if allavailable==0
gen totalacnihratio=totalacnihmean/totalacnotnihmean
sum nihratio if monthtag<=12
sum nihratio if monthtag>13 & monthtag<=24
sum nihratio if monthtag>25 & monthtag<=36
sum nihratio if monthtag>37 & monthtag<=48
sum nihratio if monthtag>49 & monthtag<=60
sum nihratio if monthtag>61 & monthtag<=72
sum nihratio if monthtag>73 & monthtag<=84
sum nihratio if monthtag<=39
sum nihratio if monthtag>=40
label define plotlab2 1 "Jan 2005" 40 "Apr 2008" 73 "Jan 2011"
label values monthtag plotlab2
/* FIGURE 2 */
twoway (scatter nihratio monthtag, msize(vsmall) mcolor(gs1)), title(">1 Patent Cite, Affected Journals")  legend(off) ytitle("") xtitle("") xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) xlabel(, nolabels) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5 6, axis(1))  name(plot1)
twoway (scatter totalnihratio monthtag, msize(vsmall) mcolor(gs1)), title("Total Cites, Affected Journals") xtitle("") ytitle("") legend(off)  xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) xlabel(, nolabels) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5 6, axis(1)) name(plot2)
twoway (scatter placebonihratio monthtag, msize(vsmall) mcolor(gs1)), title(">1 Patent Cite, Unaffected Journals")   xtitle("Month of Publication") ytitle("") legend(off)  xlabel(1 40 73,val)  xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5 6, axis(1))  name(plot3)
twoway (scatter totalacnihratio monthtag, msize(vsmall) mcolor(gs1)), title("Academic Cites, Affected Journals") xtitle("Month of Publication") ytitle("") legend(off)  xlabel(1 40 73,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5 6, axis(1))  name(plot4)
graph combine plot1 plot2 plot3 plot4
graph export combinedDiDplot.png, replace width(4000)

/* FIGURE A2 */
// Plot probability of NIH funding by month, and show OA affect on treated journals
by monthtag, sort: egen probnihfunded = mean(usphsfunding)
twoway (line probnihfunded monthtag), title("NIH funding probability does not change in 2008") xlabel(1 25 40 64, val)  xline(40) xline(38,lcolor(pink)) yscale(range(0,1)) xtitle("Month of Publication") ytitle("Probability an article is NIH funded") ylabel(0 .2 .4 .6 .8 1) xline(42,lcolor(pink))
graph export DiDNIHFundingprob.png, replace width(4000)

// Plot probability of OA as of June 2013; this means that articles published after monthtag 84 in our sample may not appear "free text" yet
// but we do not directly use this variable in estimation, but rather as a proxy for historical free availability by journal

/* FIGURE 1 */
by monthtag, sort: egen fulltext = mean(fftbinary) if usphsfunding==0 & allavailable==0 & monthtag<=84
by monthtag, sort: egen fulltext2 = mean(fftbinary) if usphsfunding==1 & allavailable==0 & monthtag<=84
twoway (line fulltext monthtag if monthtag<=84) (line fulltext2 monthtag if monthtag<=84), title("OA Change following NIH Policy") xtitle("Month of Publication") legend(off) xscale(range(1,84)) xlabel(1 40 73,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) ylabel(.2 .4 .6 .8 1, axis(1)) ytitle("Probability an Article is Open Access") note("Top line are NIH funded articles, bottom line are all others", size(small) position(5))
graph export DiDOAprobchange.png, replace width(4000)

// Same plot as previous but using whether an article in available on PMC within 18 months of publication

/* FIGURE A3 */
by monthtag, sort: egen fulltext3 = mean(pmcwithin18months) if usphsfunding==0 & allavailable==0 & monthtag<=84
by monthtag, sort: egen fulltext4 = mean(pmcwithin18months) if usphsfunding==1 & allavailable==0 & monthtag<=84
twoway (line fulltext3 monthtag if monthtag<=84) (line fulltext4 monthtag if monthtag<=84), title("OA Change following NIH Policy, Alternative Definition") xtitle("Month of Publication") legend(off) xscale(range(1,84)) xlabel(1 25 40 64,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) ylabel(.2 .4 .6 .8 1, axis(1)) ytitle("Probability an Article is in PMC within 18 months") note("Red line is NIH funded, black line is all others", size(small) position(5))
graph export DiDOAPMCprobchange.png, replace width(4000)





/* Construct fully saturated triple difference on basis on reasoning in prior results */
/* That is, nil effects when using "unaffected" journals, positive when using affected journals */
/* So find triple difference of post-April08, NIH funded, affected journals */
/* If result due just to NIH pushing applied work, then this triple difference will be null result */
gen affectedjournal=0
replace affectedjournal=1 if allavailable==0
gen affectedjournalafterapril08=0
replace affectedjournalafterapril08=1 if affectedjournal==1 & afterapril08==1
gen affectedjournalnih=0
replace affectedjournalnih=1 if affectedjournal==1 & usphsfunding==1
gen affectedjournalafterapril08nih=0
replace affectedjournalafterapril08nih=1 if affectedjournal==1 & usphsfunding==1 & afterapril08==1
/* affectedjournal will be dropped because collinear with journal dummies below */
/* Result: in addition to placebo estimate, formal triple diff esp. on cited ever specification shows */
/* Poisson with robust s.e. is giving same result as Santos-Silva "Log of Gravity" (stata: ppml) Poisson psuedo maximum likelihood */
/* As throughout, diff in standard errors for "totalcites" with robust s.e. is large, so following King and Roberts 2014 we focus on transformation that removes this skew while still retaining parameters of interest, citedever */

/* TABLE 3 */
poisson totalcites affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih, vce(robust)
poisson citedever affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih, vce(robust)
poisson threeyearbinary affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih, vce(robust)
poisson academiccites affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih, vce(robust)


/* Perform estimates using only 2 years plus and minus around the policy */

/* TABLE A3 */
poisson citedever afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==0 & monthtag>=16 & monthtag<=64, vce(robust)
poisson totalcites afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==0 & monthtag>=16 & monthtag<=64, vce(robust)
/* placebos 2 years plus and minus */
poisson citedever afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==1 & monthtag>=16 & monthtag<=64, vce(robust)
poisson totalcites afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==1 & monthtag>=16 & monthtag<=64, vce(robust)
/* triple diff 2 years plus and minus */
poisson citedever affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih if monthtag>=16 & monthtag<=64, vce(robust)
poisson totalcites affectedjournalafterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 affectedjournal affectedjournalafterapril08 afterapril08nih affectedjournalnih if monthtag>=16 & monthtag<=64, vce(robust)

/* Check granted cites */
sum citedever grantedcitedever grantedcited2017ever 
/* Redoing granted using Aug 2017 so half of cites are kept and time worry is less severe shows results very similar to main findings */
/* Table A12, columns 3 and 4 */
poisson grantedcited2017ever afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==1, vce(robust)
poisson granted2016 afterapril08nih i.monthtag i.journaltag usphsfunding afterapril08 if allavailable==1, vce(robust)

/* Restricting to patents with multiple country apps simultaneously doesn't change things */
sum bigfamilyciteever
poisson bigfamily afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson bigfamilyciteever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson bigfamily afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)
poisson bigfamilyciteever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==1, vce(robust)

/* To see problem with Log and standard diff-in-diff with many zeroes: with the base rate falling between the pre and post period, the error introduced
   by adding a 1 (or other term) to the log in order to permit log-linearization overwhelms the positive multiplicative trend identified in Fig 6-8 and
   in the corresponding ppml estimations.  The following quick and dirty example shows what is going wrong.  */
gen lncites=ln(totalcites)
gen lntotalcitesNIH=.
replace lntotalcitesNIH=lncites if usphsfunding==1
gen lntotalcitesnotNIH=.
replace lntotalcitesnotNIH=lncites if usphsfunding==0
by monthtag, sort: egen meanlncitesNIH = mean(lntotalcitesNIH) if allavailable==0
by monthtag, sort: egen meanlncitesnotNIH = mean(lntotalcitesnotNIH) if allavailable==0
/* Absolute difference in mean cites in pre period between NIH and non-NIH articles in "affected journals" is .74-.64=.1 */
sum meanlncitesNIH if monthtag<42
sum meanlncitesnotNIH if monthtag<42
/* In post period difference is .40-.33=.07, so diff in diff is roughly -.03 */
sum meanlncitesNIH if monthtag>=42
sum meanlncitesnotNIH if monthtag>=42
/* diff-in-diff finds precisely that difference, -.03, even with controls */
reg lncites afterapril08nih i.monthtag  i.journaltag usphsfunding if allavailable==0, vce(robust)


/* -------------------- FRONT PAGE CITATIONS -------------------------------------*/
/* No effect when using front page cites for our main regression */
use Nov2017FrontPageMatchesStata, clear
/* drop small number of pre-1/1/2005 papers */
drop if pubmedid<15626732
/* Drop if application after Nov 2014, for comparability across samples */
collapse (count) totalcites=patentpubnum, by(pubmedid)
/* Merge front page matches with list of papers */
merge 1:m pubmedid using Sept2015Listofpapers
rename monthtagjan20051 monthtag
drop if monthtag>96
// Make zero cites if unmatched pubmedid
replace totalcites=0 if totalcites==.
gen afterapril08=0
replace afterapril08=1 if monthtag>=40
gen citedever=0
replace citedever=1 if totalcites>0
gen allavailable=0
replace allavailable=1 if journaltag==1 | journaltag==4 | journaltag==5 | journaltag==8 | journaltag==12 | journaltag==15 | journaltag==21 | journaltag==23 | journaltag==25 | journaltag==27 | journaltag==28 | journaltag==29 | journaltag==42
gen afterapril08nih=0
replace afterapril08nih=1 if afterapril08==1 & usphsfunding==1
/* 13.2% of articles have been cited on the front page at least once */
/* No impact of cites or >1 cite using front page citations */

/* TABLE A12, COLUMN 1 AND 2 */
poisson totalcites afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)
poisson citedever afterapril08nih i.monthtag i.journaltag usphsfunding if allavailable==0, vce(robust)

/* construct graphs showing multiplicative treatment effect for patents and not for academia */
gen patnih=.
replace patnih=citedever if usphsfunding==1 
gen patnotnih=.
replace patnotnih=citedever if usphsfunding==0 
by monthtag, sort: egen nihmean = mean(patnih) if allavailable==0
by monthtag, sort: egen notnihmean = mean(patnotnih) if allavailable==0
gen nihratio=nihmean/notnihmean
// Generate "placebo" propensity picture for journals that are "untreated" by NIH policy
by monthtag, sort: egen placebonihmean = mean(patnih) if allavailable==1
by monthtag, sort: egen placebonotnihmean = mean(patnotnih) if allavailable==1
gen placebonihratio=placebonihmean/placebonotnihmean
gen totalpatnih=.
replace totalpatnih=totalcites if usphsfunding==1
gen totalpatnotnih=.
replace totalpatnotnih=totalcites if usphsfunding==0
by monthtag, sort: egen totalnihmean = mean(totalpatnih) if allavailable==0
by monthtag, sort: egen totalnotnihmean = mean(totalpatnotnih) if allavailable==0
gen totalnihratio=totalnihmean/totalnotnihmean
gen totalacpatnih=.
replace totalacpatnih=academiccites if usphsfunding==1
gen totalacpatnotnih=.
replace totalacpatnotnih=academiccites if usphsfunding==0
by monthtag, sort: egen totalacnihmean = mean(totalacpatnih) if allavailable==0
by monthtag, sort: egen totalacnotnihmean = mean(totalacpatnotnih) if allavailable==0
gen totalacnihratio=totalacnihmean/totalacnotnihmean


/* FIGURE A7 */
twoway (scatter nihratio monthtag, msize(vsmall) mcolor(gs1)), title("FrontPage Cite Propensity Ratio, NIH vs. non-NIH") xtitle("Month of Publication") legend(off)  xlabel(1 25 40 64,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5, axis(1)) ytitle("Ratio of Patent Citation Propensity, NIH vs. non-NIH")
graph export DiDfrontpagepatentcites.png, replace width(4000)
twoway (scatter placebonihratio monthtag, msize(vsmall) mcolor(gs1)), title("FrontPage Cite Propensity Ratio in Untreated Journals") xtitle("Month of Publication") legend(off)  xlabel(1 25 40 64,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5, axis(1)) ytitle("Ratio of Patent Citation Propensity, NIH vs. non-NIH")
graph export DiDfrontpageplacebopatentcites.png, replace width(4000)
twoway (scatter totalnihratio monthtag, msize(vsmall) mcolor(gs1)), title("FrontPage Patent Citation Ratio, NIH vs. non-NIH") xtitle("Month of Publication") legend(off)  xlabel(1 25 40 64,val) xline(40) xline(38,lcolor(pink)) xline(42,lcolor(pink)) yscale(range(1 5) axis(1)) ylabel(1 2 3 4 5, axis(1)) ytitle("Ratio of Total Patent Citations, NIH/non-NIH")
graph export DiDfrontpagetotalCites.png, replace width(4000)


